package tvkingdom

import (
	"fmt"
	"github.com/gocolly/colly"
	"github.com/gogf/gf/v2/frame/g"
	"github.com/gogf/gf/v2/os/gctx"
	"github.com/gogf/gf/v2/os/gtime"
	"github.com/gogf/gf/v2/text/gregex"
	"github.com/gogf/gf/v2/text/gstr"
	"time"
	"uni-crawl-frame/core"
	"uni-crawl-frame/db/mysql/model/entity"
	"uni-crawl-frame/service/crawl/replayservice"
	"uni-crawl-frame/utils/timeutil"
)

// 1、tv.so-net.ne.jp重定向到www.tvkingdom.jp, 只需要录入tvkingdom
// 2、只用录入到*.action(eg:https://www.tvkingdom.jp/chart/23.action)，url后面的get参数不会影响到节目单需要关心的展示方式
// 3、一个*.action地址包含多个电视台，电视台列xpath=//div[contains(@id,'cell-station-top')]
type TvkingdomCrawl struct {
	*core.AbstractCrawlReplayUrl
}

func (receiver *TvkingdomCrawl) CreateProgram(replayConfig *entity.CmsCrawlReplayConfig, manifestTask *entity.CmsCrawlReplayManifestTask) {
	log := g.Log().Line()
	programTitle := ""

	coll := colly.NewCollector()
	var programTasks []*entity.CmsCrawlReplayProgramTask
	pLen := 0

	programIdPrefixXpath := "cell-station-top-"
	programIdXpath := ""
	// 暂时只取第一个电视台节目单
	coll.OnXML(fmt.Sprintf("//div[contains(@id,'%s')][1]", programIdPrefixXpath), func(element *colly.XMLElement) {
		programTitle = element.Attr("title")
		id := element.Attr("id")
		log.Infof(gctx.GetInitCtx(), "tvkingdom电视台名称 = %s", programTitle)

		tvNo := gstr.SubStr(id, len(programIdPrefixXpath))
		programIdXpath = "cell-" + tvNo + time.Now().Format(timeutil.YYYY_MM_DD_JOIN)

		coll2 := colly.NewCollector()
		coll2.OnXML(fmt.Sprintf("//div[contains(@id,'%s')]//a[@class='schedule-link']", programIdXpath), func(element *colly.XMLElement) {

			title := gstr.TrimAll(element.Text)
			hrefStr := element.Attr("href")
			mathResults, _ := gregex.MatchString(fmt.Sprintf("/schedule/%s(.*).action", tvNo), hrefStr)
			timeStr := mathResults[1]
			log.Infof(gctx.GetInitCtx(), "节目 = %s. start at %s", title, timeStr)

			programTask := new(entity.CmsCrawlReplayProgramTask)
			programTask.ManifestId = manifestTask.Id
			programTask.ConfigId = replayConfig.Id
			programTask.ProgramName = title
			programTask.CrawlStatus = replayservice.ProgramTaskInit

			programStartTime, _ := time.Parse(timeutil.YYYY_MM_DD_HH_MM_JOIN, timeStr)
			programTask.ProgramStartTime = gtime.NewFromTime(programStartTime)
			programTask.ProgramNo = programTask.ProgramStartTime.TimestampMilliStr()
			programTask.HostIp = replayConfig.Host
			programTask.CreateTime = gtime.Now()

			programTasks = append(programTasks, programTask)
			pLen = len(programTasks)
		})

		coll2.OnScraped(func(response *colly.Response) {
			for i, programTask := range programTasks {

				var programNextStarTime *gtime.Time
				if i+1 == pLen {
					// 最后一个节目单无法计算时长，只能手动赋予一个
					programNextStarTime = programTask.ProgramStartTime.Add(time.Hour)
				} else {
					// 下一个节目单的开始录制时间作为当前节目的结束录制时间
					programNextStarTime = programTasks[i+1].ProgramStartTime
				}

				sec := (programNextStarTime.TimestampMilli() - programTask.ProgramStartTime.TimestampMilli()) / 1000
				programTask.ProgramEndTime = programTask.ProgramStartTime.Add(gtime.S * time.Duration(sec))
				programTask.ConfigId = replayConfig.Id
				replayservice.CheckAndSave(programTask)
			}
		})

		_ = coll2.Visit(replayConfig.SeedUrl)
	})

	_ = coll.Visit(replayConfig.SeedUrl)

}
